https://fred.stlouisfed.org/series/AWHAE Our data is FRED creating an index (2007=100) of aggregate weekly hours reported on a monthly basis. The indexes of aggregate weekly hours are calculated by dividing the current month’s aggregate hours by the average of the 12 monthly figures, for the base year (2007).

1 Data Load

# Read in Data
working_hours = read_excel("../data/ch10_Weekly_Hours.xls")

# Added First Difference and year/month variable to base dataset.
working_hours <- working_hours %>%
  mutate(year = year(observation_date),
         month = month(observation_date),
         first_difference_index =
         working_hours$hours_worked_index-lag(working_hours$hours_worked_index))

# Create a time series object
working_hours_ts <- ts(working_hours$hours_worked_index, 
                      frequency = 12,
                      start = c(2006, 3))

# Create First Difference of Hours Worked Index
working_hours_diff = diff(working_hours_ts)

# Create Log Difference of Hours Worked Index
working_hours_log = log(working_hours_ts)

2 EDA Raw Data

plot_work_hours <- ggplot(working_hours, aes(x = observation_date, 
                                             y = hours_worked_index)
                          ) +
  geom_line(size = 1) +
  stat_smooth(color = "blue", size =.4) +
  geom_rect(aes(xmin=as.POSIXct('2020-02-01'), xmax=as.POSIXct('2020-04-01'), ymin=-Inf, 
                ymax=Inf),linetype = 'dashed',color="light green", fill = 'light green', alpha=.01) +
  
  geom_rect(aes(xmin=as.POSIXct('2007-12-01'), xmax=as.POSIXct('2008-07-01'), ymin=-Inf, ymax=Inf), 
            linetype = 'dashed',color="light green", fill = 'light green', alpha=.01) +
  geom_hline(yintercept = mean(working_hours$hours_worked_index), linetype = "dashed", color = "red") +
   scale_x_datetime(labels = date_format("%Y"),breaks = "1 year") +
  labs(x = 'Date', y = 'Indexed Working Hours',
      title = 'Mar 2006 - MAR 2023 Agg. Weekly Work Hours Index Indexed(2007=100)', 
      subtitle =  'Recession(s) highlighed  Red Dashed Line = Time Series Mean') +
  theme(axis.text.y=element_text(face="bold", color="black", size=10, angle=0),
      axis.ticks.y=element_blank(),
      axis.title = element_text(face = "bold", color="black", size=10),
      axis.text.x = element_text(face = "bold", color="black", size=10),
      panel.background =element_rect('White'),
      legend.position = "none")
plot_work_hours

# Interactive Plots
ggplotly(plot_work_hours, originalData = TRUE, tooltip = "y")
ts_plot(working_hours_ts, slider = TRUE,
        title = "Mar 2006 - MAR 2023 Agg. Weekly Work Hours Index Indexed(2007=100)",
        width = 2, type = "single", Xtitle = "Date", Ytitle = "Index Value - Base 2007 = 100",
        color = "gold", Xgrid = TRUE, Ygrid = TRUE
        ) %>%
    layout(paper_bgcolor = "black",
         plot_bgcolor = "black",
         font = list(color = "white"),
         yaxis = list(linecolor = "white",
                      zerolinecolor = "white",
                      gridcolor= "white"),
         xaxis = list(linecolor = "white",
                      zerolinecolor = "white",
                      gridcolor= "white"))
# Heatmap: Log Working Hours Time Series By Year and Month
ts_heatmap(working_hours_log, 
           title = "Mar 2006 - MAR 2023 Agg. Weekly Work Hours Index Indexed(2007=100)")

The dotted-red line is mean. The black line is index value and a trend line in blue. The green periods are recessions. There is no doubt there an overall upward trend in the data and seasonality must be explored. The heat map is interesting to visualize variance index values through the years, especially in capturing economic recessionary time periods.

2.1 Summary Stats Grouped Year - plot

stats_year_base <- working_hours %>%
  #select(year, hours_worked_index) %>%
  group_by(year) %>%
  summarise(year, avg = mean(hours_worked_index), median = median(hours_worked_index),
            stand_dev = sd(hours_worked_index)  
            ) %>%
  ggplot(aes(x = reorder(year, avg), y = avg)) +
  geom_point() +
  geom_errorbar(aes(ymin = avg - stand_dev, ymax = avg + stand_dev)) +
  coord_flip() +
  labs(title = 'MAR 2006 - Mar2023: Annual Mean & Variance', subtitle = '', 
       y = "Index Value with Base Year 2007 = 100", x = '') +
  theme(axis.text.y=element_text(face="bold", color="black", size=10, angle=0),
      axis.ticks.y=element_blank(),
      axis.title = element_text(face = "bold", color="black", size=10),
      axis.text.x = element_text(face = "bold", color="black", size=10),
      panel.background =element_rect('White'),
      legend.position = "none")
stats_year_base

#interactive chart
ggplotly(stats_year_base)

This chart looks at the variance in index of hours worked by year from 2006-2023 and can shed some insight about variance of indexed values for working hours.

2.2 Summary Stats Grouped Month - Plot

stats_month_base <- working_hours %>%
  #select(year, month, hours_worked_index) %>%
  mutate(month_name = as.factor(month.abb[month])) %>%
  group_by(month_name) %>%
  summarize(month_name, avg = mean(hours_worked_index), median = median(hours_worked_index),
            stand_dev = sd(hours_worked_index)  
            ) %>%
  ggplot(aes(x = reorder(month_name, avg), y = avg)) +
  geom_point() +
  geom_errorbar(aes(ymin = avg - stand_dev, ymax = avg + stand_dev)) +
  coord_flip() +
  labs(title = 'MAR 2006 - Mar 2023 Variance by Month  Base 2007 = 100', subtitle = 'Min, Mean, Max', 
       y = "", x = '') +
  theme(axis.text.y=element_text(face="bold", color="black", size=10, angle=0),
      axis.ticks.y=element_blank(),
      axis.title = element_text(face = "bold", color="black", size=10),
      axis.text.x = element_text(face = "bold", color="black", size=10),
      panel.background =element_rect('White'),
      legend.position = "none")

stats_month_base

#interactive chart
ggplotly(stats_month_base)

Variance in index of hours worked by month from 2006-2023 provides insight about cycles in working hours.

2.3 Boxplot by Month to Evaluate Seasonality

# Boxplot for cycle associations by month
ts_plot_season <- function(x = x) {
season <- cycle(x)
season.factor <- factor(season)
ggplot() + 
  geom_boxplot(mapping = aes(x = season.factor,
                             y = x)) +
  labs(x = "Month", y =  "Index Hours Worked",
       title = "Seasonality/Cycles: Agg Weekly Hours Indexed(2007 = 100) reported monthly") +
  theme(axis.text.y=element_text(face="bold", color="black", size=10, angle=0),
      axis.ticks.y=element_blank(),
      axis.title = element_text(face = "bold", color="black", size=10),
      axis.text.x = element_text(face = "bold", color="black", size=10),
      panel.background =element_rect('White'),
      legend.position = "none")
}
raw_season <- ts_plot_season(working_hours_ts) +
  ggtitle("Raw Data Seasonality/Cycles by Month All Years, Base 2007 = 100")
raw_season  

# Interactive graphs
ts_seasonal(working_hours_ts, type = "box", 
            title = "Raw Data Seasonality/Cycles by Month All Years, Base 2007 = 100")

This box plot is another method to look at potential seasonality by month from 2006-2023. The Q1 and Q4 are slightly higher than Q2 and Q3 as present in the box plot which is indicative of a cycle. First let’s address the Trend and maybe the seasonality with taking the first difference. It is clearly evident that taking the first-difference of indexed hours worked is worth exploring.

3 Dickey-Fuller Check For Stationary Time Series

adf_base <- ur.df(working_hours_ts, type = "none", selectlags = "AIC")
summary(adf_base)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.6310  -0.1820   0.1173   0.3199   3.6586 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## z.lag.1     0.0007647  0.0008933   0.856    0.393
## z.diff.lag -0.0224307  0.0705693  -0.318    0.751
## 
## Residual standard error: 1.302 on 201 degrees of freedom
## Multiple R-squared:  0.003956,   Adjusted R-squared:  -0.005955 
## F-statistic: 0.3992 on 2 and 201 DF,  p-value: 0.6714
## 
## 
## Value of test-statistic is: 0.8561 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62

The test statistic is higher than all tau values for 1, 5 and 10 percent and p-value is .6714. Transforming working hours index with the first difference of working hours is pragmatic to achieve stationarity. The team decided to evaluate log of working hours and test for stationarity.

3.1 ADF/stationarity test of Log of Working Hours index

df_log_data = ur.df(working_hours_log, type = "none", selectlags = "AIC", lags = 0)
summary(df_log_data) # t-stat = 0.8554 and tau1 at 10pct = -1.62, p-value = 0.3933,
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.163782 -0.001772  0.001058  0.002846  0.042298 
## 
## Coefficients:
##          Estimate Std. Error t value Pr(>|t|)
## z.lag.1 0.0001661  0.0001942   0.855    0.393
## 
## Residual standard error: 0.01283 on 203 degrees of freedom
## Multiple R-squared:  0.003591,   Adjusted R-squared:  -0.001317 
## F-statistic: 0.7317 on 1 and 203 DF,  p-value: 0.3933
## 
## 
## Value of test-statistic is: 0.8554 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
# thus log_work_hours not stationary and first difference required.

t-stat = 0.8554 and tau1 at 10pct = -1.62, p-value = 0.3933, thus log_work_hours not stationary and first difference required.

3.2 Test Number of Differences to Log of Working Hours Required to Get Stationary Data

lag_order_log <-ndiffs(working_hours_log, type = "trend", test = "adf")
lag_order_log # 1
## [1] 1

3.3 ADF/Stationarity Test AFTER Executing First Difference of Log Working Hours Transformation

working_hours_log_diff <- diff(working_hours_log) # create time-series
df_diff_log_data = ur.df(working_hours_log_diff, type = "none", selectlags = "AIC",lags = 0)
summary(df_diff_log_data) 
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.163640 -0.001007  0.001867  0.003638  0.037781 
## 
## Coefficients:
##         Estimate Std. Error t value Pr(>|t|)    
## z.lag.1  -1.0337     0.0703   -14.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01288 on 202 degrees of freedom
## Multiple R-squared:  0.517,  Adjusted R-squared:  0.5146 
## F-statistic: 216.2 on 1 and 202 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -14.7033 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62

t-stat is -14.7033, which is < tau1 at -2.58 and p-value = 2.2e-16 and AR^2 =0.5146

3.4 ADF/Stationarity Test AFTER First Difference of Working Hours Transformation Executed

df_diff_data = ur.df(working_hours_diff, type = "none", selectlags = "AIC", lags =0)
summary(df_diff_data)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.5378  -0.1018   0.1964   0.3973   3.8030 
## 
## Coefficients:
##         Estimate Std. Error t value Pr(>|t|)    
## z.lag.1 -1.01800    0.07033  -14.47   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.301 on 202 degrees of freedom
## Multiple R-squared:  0.5091, Adjusted R-squared:  0.5067 
## F-statistic: 209.5 on 1 and 202 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -14.474 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62

t-stat is -14.474, which is < tau1 at 1pct-level, a p-value = 2.2e-16 and AR^2 = 0.5067 (only 0.0079 less than diff(log(working_hours_ts)). There is only a marginal benefit of taking log of working hours and then transforming the difference of log working hours. Dickey-Fuller with a transformation on the first difference of working hours suggests that we have stationary data now, as the p-value is below 0.01 and test statitic of -14.474 is less than critical value of -2.58 at 99 percent confidence interval, we can reject the null hypothesis that this data is not stationary. Taking the working hours index and transforming it using first difference is sufficient for stationary time series and we chose to not use the log of working hours and then difference the log. Let’s double-check stationarity of our time series with Philips Perron and KPSS tests.

3.5 ndiffs test on Differenced Data to Check for Stationarity.

lag_order <- ndiffs(working_hours_ts, test = "adf")  # Using ADF test for lag order selection
lag_order
## [1] 1
lag_order_check <- ndiffs(working_hours_diff, test = "adf")  # Using ADF test for lag order selection
lag_order_check
## [1] 0

3.6 Phillips Perron Test Stationary Time-Series

pp.test(working_hours_ts)
## 
##  Phillips-Perron Unit Root Test
## 
## data:  working_hours_ts
## Dickey-Fuller Z(alpha) = -11.391, Truncation lag parameter = 4, p-value
## = 0.4663
## alternative hypothesis: stationary
pp.test(working_hours_diff)
## 
##  Phillips-Perron Unit Root Test
## 
## data:  working_hours_diff
## Dickey-Fuller Z(alpha) = -184.69, Truncation lag parameter = 4, p-value
## = 0.01
## alternative hypothesis: stationary

Confirms data is stationary.

3.7 KPSS Test

kpss_base <- ur.kpss(working_hours_ts)
summary(kpss_base)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 3.2645 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
kpss_first_difference <- ur.kpss(working_hours_diff)
summary(kpss_first_difference)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 4 lags. 
## 
## Value of test-statistic is: 0.1174 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

The normal time series without differencing as a t-statistics of 3.2645, which is much higher than critical value at 10 pct significance-level. When we difference working hours and complete KPSS, our t-statistic is .01174 and lower than critical value at 1 pct. This confirms Dickey-Fuller that differencing hours worked makes the time series stationary.

3.8 EDA & Evaluate Stationarity on Transformed First Differenced

# create First Difference of Hours Worked Index
working_hours_diff = diff(working_hours_ts)

## Comparison of First Difference vs. Raw Data
par(mfrow = c(2,1), mex = 0.6, cex = 0.8)
plot(working_hours_ts, main = 'Index of avg Hours Worked (Base = 2007 at 100) Red Line = Time-Series Mean', 
        xlab = 'Date', ylab = 'Index Value ',type = "l", col = 'blue')
    abline(h = mean(working_hours_ts), col = "red")

plot(working_hours_diff, main = 'First Difference of Working Hours', xlab = 'Date',
     ylab = 'First Difference Index Value')
     lines(working_hours_diff, col = 'blue')
     abline(h = mean(working_hours_diff), col = 'red')

ts_heatmap(working_hours_diff, 
           title = "Mar 2006 - MAR 2023 First Differenced Agg. Weekly Work Hours Index Indexed(2007=100)")
# Interactive Chart
interactive_raw_first_difference_plot <- cbind(working_hours_ts,working_hours_diff)
ts_plot(interactive_raw_first_difference_plot, width = 3, type = "multiple",
        title = "Raw Vs. First Difference Agg. Weekly Work Hours Index Indexed(2007=100)"
        , Xtitle = "Date", Ytitle = "Index Value - Base 2007 = 100",
        color = "gold", Xgrid = FALSE, Ygrid = FALSE,
        ) %>%
    layout(paper_bgcolor = "black",
         plot_bgcolor = "black",
         font = list(color = "white"),
         yaxis = list(linecolor = "white",
                      zerolinecolor = "white",
                      gridcolor= "white"),
         xaxis = list(linecolor = "white",
                      zerolinecolor = "white",
                      gridcolor= "white")
         )      

We can see that using the first difference removes the trend with the COVID outlier remaining. Let’s look at a histogram of raw data vs. first difference data.

3.9 Histogram data

# Histogram - Basic 
hist_base <- gghistogram(working_hours_ts, fill = "blue", color = 'black', alpha = 0.50,
                    title ='Mar 2006 - MAR 2023', 
                    subtitle = 'Agg. Weekly Work Hours Index Indexed(2007=100)',
                    xlab = 'Index Value: Base (2007 = 100)', ylab = 'Frequency')  +
  geom_density(color = 'red' )

# Histogram - Basic for First Difference of Hours Worked Index
hist_diff <- gghistogram(working_hours_diff, fill = "blue", color = 'black', alpha = 0.50,
                    title ='First Difference: Mar 2006 - MAR 2023   n = 204', 
                    subtitle = 'Agg. Weekly Work Hours Index Indexed(2007=100)',
                    xlab = 'Index Value: Base (2007 = 100)', ylab = 'Frequency')  +
                    xlim(-5,5) +
  geom_density(color = 'red' )

# Combination of Histograms with Raw Data, First Difference & Second Difference
hist_combo <- ggarrange(hist_base, hist_diff, nrow = 2, label.y  = 0.3,
                        font.label = list(size = 8, color = "black", face = "bold", family = NULL))
plot(hist_combo)

# Interactive historgram raw data & first difference
hist_final <- subplot(ggplotly(hist_base), ggplotly(hist_diff))
hist_final

Here we can see the distribution differences between the raw data and the first difference data. The first difference data has a more normal distribution indicating that the data is possibly stationary and that we can model on it using the regression assumption necessary to draw inferences. Let’s look at boxplot once again and compare raw data vs. first difference of index of working hours.

3.10 Boxplot of Raw vs. First Difference Data

# First Difference of Hours Worked Index and Seasonality Check
base_boxplot <- ts_seasonal(working_hours_ts, type = "box", 
                title = "Raw Data Seasonality/Cycles by Month All Years, Base 2007 = 100")
base_boxplot
first_diff_boxplot <- ts_seasonal(working_hours_diff, type = "box",
  title = "Raw & First Difference Seasonality/Cycles by Month All Years, Base 2007 = 100")
first_diff_boxplot
comb_box <- subplot(ggplotly(base_boxplot), ggplotly(first_diff_boxplot), nrows = 2, shareX = TRUE)
comb_box

The variance has decreased, let’s look at seasonality.

3.11 Scatter plot by Year to Evaluate Seasonality (Raw data vs. First Difference)

## Seasonality check 
season_raw <- ggseasonplot(working_hours_ts) +
  ylab("Index Hours") +
  ggtitle("Raw:All Years Seasonal Plot") +
  theme_classic2()

# Interactive Chart
ggplotly(season_raw)
# First Difference of Hours Worked Index and Seasonality Check
season_first_diff <- ggseasonplot(working_hours_diff) +
  ylab("Index Hours") +
  ggtitle("Base Vs.First Difference: Seasonal Plot All Years") +
  theme_classic2()

# Interactive Chart
ggplotly(season_first_diff)
# Combined Plot of Raw vs. First Differenced Hours Worked Index
combo_diff_sec_diff_scatter <- ggarrange(season_raw, season_first_diff,
          nrow = 2, common.legend = TRUE, legend = 'right', label.x = '')
combo_diff_sec_diff_scatter

# Interactive Chart of Seasonality Raw Data & First Difference Hours Worked
subplot(ggplotly(season_raw), ggplotly(season_first_diff), nrows = 2, shareX = TRUE)

There are 17 periods for each month with April being lowest aggregated and indexed to 2007 with a value 99.9, and December was highest aggregated and indexed value of 102.9 hours against base 2007. Seasonality plot for all years and months demonstrates the Great Recession and Covid period as illuminated in purple line not like any other of the other lines plotting index of hours worked over all twelve months from 2006-2023.

3.12 Decompose data to inspect trend, seasonal and random events

decomp_work_hours_ts <- decompose(working_hours_ts, "additive")
plot(decomp_work_hours_ts)

# Interactive Charts
decomp_work_hours_ts <- ts_decompose(working_hours_ts, "additive", showline = TRUE)
decomp_work_hours_ts
# First Difference Hours Worked Index decomposition
decomp_work_hours_diff <- ts_decompose(working_hours_diff, "additive", showline = TRUE)
decomp_work_hours_diff

The width and height of seasonal cycles over time is predictable and trend is fairly linear with the first difference of hours worked, or seasonal variation is relatively constant over time. Thus, decomposition additively (Yt = Tt + St + Rt) seems appropriate. We’ll check if differencing of seasonality required using nsdiffs() function.

3.13 Seasonal Differencing Check

working_hours_diff %>% nsdiffs()
## [1] 0

We see that seasonal differencing is not required for our time series, let’s now check the correlograms.

3.14 Raw & First Difference Plot the autocorrelation and partial autocorrelations

# Correlogram of raw time series
acf_12_plot <- ggAcf(working_hours_ts, lag.max = 12,  
             main = "ACF Base Time Series")
pacf_12_plot <- ggPacf(working_hours_ts, lag.max = 12,
               main = "PACF Base Time Series")

acf_pacf_12_plot <- ggarrange(acf_12_plot, pacf_12_plot, nrow = 2) 
acf_pacf_12_plot

# Correlogram of first difference index of hours worked
acf_12_diff <- ggAcf(working_hours_diff, lag.max = 12,  
             main = "First Difference ACF - Lag = 12")
pacf_12_diff <- ggPacf(working_hours_diff, lag.max = 12,
               main = "First Difference PACF - Lag = 12")

acf_pacf_12_diff <- ggarrange(acf_12_diff, pacf_12_diff, nrow = 2)
acf_pacf_12_diff

acf_pacf_combo <- ggarrange(acf_pacf_12_plot, acf_pacf_12_diff, nrow = 1, ncol = 2)
acf_pacf_combo

#Interactive Chart
ts_cor(working_hours_diff, seasonal = FALSE, lag.max = 24)

With ACF and PACF spikes at the second lag suggests that a combined ARMA model of order 2 may be appropriate to consider. The MA(2), AR(2) and ARMA(2,2) models are still intuitive models to further research for modeling as the PACF is alternating through lag periods.

4 Model Fitting and Analysis (2,1,2)

# Fit an ARMA model with order of 2 for both terms with the first difference.
model_ARMA212 = arima(working_hours_ts, order = c(2, 1, 2))
summary(model_ARMA212) 
## 
## Call:
## arima(x = working_hours_ts, order = c(2, 1, 2))
## 
## Coefficients:
##          ar1     ar2      ma1      ma2
##       0.1216  0.0316  -0.1472  -0.1866
## s.e.  0.4301  0.3498   0.4226   0.3469
## 
## sigma^2 estimated as 1.634:  log likelihood = -339.57,  aic = 689.14
## 
## Training set error measures:
##                     ME     RMSE       MAE        MPE      MAPE    MASE
## Training set 0.1050544 1.275151 0.4648785 0.08895701 0.4634784 1.03645
##                      ACF1
## Training set -0.006881475
coeftest(model_ARMA212)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)
## ar1  0.121647   0.430083  0.2828   0.7773
## ar2  0.031596   0.349790  0.0903   0.9280
## ma1 -0.147186   0.422586 -0.3483   0.7276
## ma2 -0.186572   0.346882 -0.5379   0.5907
autoplot(model_ARMA212)

BIC(model_ARMA212)
## [1] 705.734
# Root test
if (all(abs(polyroot(c(1, -model_ARMA212$coef))) < 1)) {
  message("Model is invertible.")
} else {
  message("Model is not invertible.")
} 

REGRESSION INTERPRETATION(S):

# R-Squared 
n = length(working_hours_ts)
arma22_R2 = cor(fitted(model_ARMA212), working_hours_ts)^2
arma22_AR2 = 1 - (1-arma22_R2) * (n - 1) / (n - 4 - 1)
arma22_R2 # 0.9618898
## [1] 0.9618898
arma22_AR2 # 0.9611276
## [1] 0.9611276
# Get the residuals from the ARIMA model
arma_residuals <- residuals(model_ARMA212)

y <- length(as.vector(working_hours_ts)) # length of working_hours_diff

# Calculate the total sum of squares (TSS)
mean_y <- mean(as.vector(working_hours_ts))
tss <- sum((as.vector(working_hours_ts) - mean_y)^2)

# Calculate the residual sum of squares (RSS)
rss <- sum(arma_residuals^2)

# Calculate the number of predictors or parameters in the model (p)
p <- length(model_ARMA212$coef)

# Calculate the number of observations (n)
n <- length(working_hours_ts)

# Calculate R-squared
rsquared <- 1 - (rss / tss)

# Calculate the adjusted R-squared
adj_rsquared <- 1 - ((1 - rsquared) * (n - 1) / (n - p - 1))

# Print the calculated R-squared and adjusted R-squared values
print(rsquared)
## [1] 0.9616042
# adj_rsquared <- 1 - rsquared
# adj_rsquared
print(adj_rsquared)
## [1] 0.9608363
# Residuals check ACF/PACF
check_res(model_ARMA212)
# Plot residuals in ACF/PACF
arma212_acf_residuals <- acf(model_ARMA212$residuals, main = "ACF Residuals")

arma212_pacf_residuals <- pacf(model_ARMA212$residuals, main = "PACF Residuals")

# combine ACF/PACF with residuals
par(mfrow = c(3,1) , mex = 0.6, cex = 0.8, cex.main = .9)
plot(arma212_acf_residuals)
plot(arma212_pacf_residuals)
qqnorm(residuals(model_ARMA212), main = 'ARMA(2,1,2) Residuals')
qqline(model_ARMA212$residuals, col = 'red')

# Q-Test
r_2_box_test <- Box.test(model_ARMA212$residuals, lag = 5, type = "Ljung")
r_2_box_test
## 
##  Box-Ljung test
## 
## data:  model_ARMA212$residuals
## X-squared = 0.22148, df = 5, p-value = 0.9989
QTest<-numeric(5)
for (i in 1:5){ 
  qt <- Box.test(model_ARMA212$residuals, lag=i, type="Ljung")
  QTest[i]=qt$statistic
} 

QTest 
## [1] 0.009850473 0.024298775 0.027227911 0.047374278 0.221478106
plot(QTest, main = "ARMA(2,1,2) Q Test", xlab = "lag", ylab = "p-value")
abline(h = .05, col = "red", lty = 3)

# Forecast 6 months
fcast_ARMA212 = forecast(model_ARMA212, h=6)
autoplot(fcast_ARMA212, include = 12, ylab = "Working hours - Base 2007 with 100") 

summary(fcast_ARMA212)
## 
## Forecast method: ARIMA(2,1,2)
## 
## Model Information:
## 
## Call:
## arima(x = working_hours_ts, order = c(2, 1, 2))
## 
## Coefficients:
##          ar1     ar2      ma1      ma2
##       0.1216  0.0316  -0.1472  -0.1866
## s.e.  0.4301  0.3498   0.4226   0.3469
## 
## sigma^2 estimated as 1.634:  log likelihood = -339.57,  aic = 689.14
## 
## Error measures:
##                     ME     RMSE       MAE        MPE      MAPE      MASE
## Training set 0.1050544 1.275151 0.4648785 0.08895701 0.4634784 0.1573234
##                      ACF1
## Training set -0.006881475
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Apr 2023       114.8934 113.2552 116.5315 112.3880 117.3987
## May 2023       114.8814 112.5941 117.1687 111.3833 118.3796
## Jun 2023       114.8798 112.2302 117.5294 110.8276 118.9320
## Jul 2023       114.8792 111.9259 117.8325 110.3625 119.3959
## Aug 2023       114.8791 111.6554 118.1028 109.9489 119.8093
## Sep 2023       114.8790 111.4069 118.3512 109.5688 120.1892
# Interactive Plot
fcast_ARMA212_plot <- plot_forecast(fcast_ARMA212, title = "ARMA(2,1,2) h =6", 
                                 Ytitle = "index-Base 2007 = 100", Xtitle="Date")
fcast_ARMA212_plot

So we have a reasonably good fit here with a ARMA(2,2) model the AIC is 689.72 which is relatively low, adjusted r-squared of 0.9611276, and phi parameters are both < 1, suggesting a good fit. As well RMSE is 1.2737 which suggests that this models predictions are roughly 1.27 units, or in our case hours, away from actual values. This is a good start.

The Ljung-Box statistic is used to see if all underlying population autocorrelations for the error may be zero. Our t-stat was 0.0015358 and p-value of 0.9968 given 5 lags, which is well above 0.05 -a significant threshold for non-zero autocorrelations that rejects null that residuals are independently distributed.

5 AR(2)

# Fit an AR model with order of 2 for both terms with the first difference.
model_AR210 = arima(working_hours_ts, order = c(2, 1, 0))
summary(model_AR210)
## 
## Call:
## arima(x = working_hours_ts, order = c(2, 1, 0))
## 
## Coefficients:
##           ar1      ar2
##       -0.0207  -0.1521
## s.e.   0.0691   0.0689
## 
## sigma^2 estimated as 1.637:  log likelihood = -339.78,  aic = 685.56
## 
## Training set error measures:
##                      ME    RMSE       MAE        MPE      MAPE     MASE
## Training set 0.09728865 1.27648 0.4587968 0.08211823 0.4575043 1.022891
##                      ACF1
## Training set -0.008826234
coeftest(model_AR210)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)  
## ar1 -0.020689   0.069078 -0.2995  0.76455  
## ar2 -0.152066   0.068861 -2.2083  0.02722 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
autoplot(model_AR210)

BIC(model_AR210)
## [1] 695.5156
# Root test
if (all(abs(polyroot(c(1, -model_AR210$coef))) < 1)) {
  message("Model is invertible.")
} else {
  message("Model is not invertible.")
}  #Not invertible

REGRESSION INTERPRETATIONS:

# R-Squared
n = length(working_hours_ts)
ar2_R2 = cor(fitted(model_AR210), working_hours_ts)^2  # 0.9618626
ar2_AR2 = 1 - (1-ar2_R2) * (n - 1) / (n - 2 - 1)
ar2_R2
## [1] 0.9617971
ar2_AR2
## [1] 0.9614188
# Get the residuals from the ARIMA model
ar_residuals <- residuals(model_AR210)

y <- length(as.vector(working_hours_ts))

# Calculate the total sum of squares (TSS)
mean_y <- mean(as.vector(working_hours_ts))
tss <- sum((as.vector(working_hours_ts) - mean_y)^2)

# Calculate the residual sum of squares (RSS)
rss <- sum(ar_residuals^2)

# Calculate the number of predictors or parameters in the model (p)
p <- length(model_AR210$coef)

# Calculate the number of observations (n)
n <- length(working_hours_ts)

# Calculate R-squared
rsquared <- 1 - (rss / tss)

# Calculate the adjusted R-squared
adj_rsquared <- 1 - ((1 - rsquared) * (n - 1) / (n - p - 1))

# Print the calculated R-squared and adjusted R-squared values
print(rsquared)
## [1] 0.9615241
print(adj_rsquared)
## [1] 0.9611432
# Residuals check ACF/PACF
check_res(model_AR210) 
# Plot residuals in ACF/PACF
ar2_acf_residuals <- acf(model_AR210$residuals, main = "ACF Residuals")

ar2_pacf_residuals <- pacf(model_AR210$residuals, main = "PACF Residuals")

# combine ACF/PACF with residuals using qqnorm.
par(mfrow = c(3,1) , mex = 0.6, cex = 0.8, cex.main = .9)
plot(ar2_acf_residuals)
plot(ar2_pacf_residuals)
qqnorm(residuals(model_AR210), main = 'ARMA(2,1,0) Residuals')
qqline(model_AR210$residuals, col = 'red')

# Q-Test
r_2_box_test <- Box.test(model_AR210$residuals, lag = 3, type = "Ljung")
r_2_box_test 
## 
##  Box-Ljung test
## 
## data:  model_AR210$residuals
## X-squared = 0.24939, df = 3, p-value = 0.9692
QTest<-numeric(3)
for (i in 1:3){ 
  qt <- Box.test(model_AR210$residuals, lag=i, type="Ljung")
  QTest[i]=qt$statistic
} 

QTest 
## [1] 0.01620484 0.05323215 0.24938500
plot(QTest, main = "AR2 Q Test", xlab = "lag", ylab = "p-value")
abline(h = .05, col = "red", lty = 3)

# Forecast 6 months
fcast_AR21 = forecast(model_AR210, h=6)
autoplot(fcast_AR21, include = 12, ylab = "Working hours - Base 2007 with 100") 

summary(fcast_AR21)
## 
## Forecast method: ARIMA(2,1,0)
## 
## Model Information:
## 
## Call:
## arima(x = working_hours_ts, order = c(2, 1, 0))
## 
## Coefficients:
##           ar1      ar2
##       -0.0207  -0.1521
## s.e.   0.0691   0.0689
## 
## sigma^2 estimated as 1.637:  log likelihood = -339.78,  aic = 685.56
## 
## Error measures:
##                      ME    RMSE       MAE        MPE      MAPE      MASE
## Training set 0.09728865 1.27648 0.4587968 0.08211823 0.4575043 0.1552653
##                      ACF1
## Training set -0.008826234
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Apr 2023       114.9173 113.2774 116.5571 112.4093 117.4252
## May 2023       114.9321 112.6369 117.2274 111.4219 118.4424
## Jun 2023       114.9292 112.2627 117.5957 110.8511 119.0073
## Jul 2023       114.9270 111.9302 117.9237 110.3439 119.5101
## Aug 2023       114.9275 111.6176 118.2373 109.8655 119.9894
## Sep 2023       114.9278 111.3330 118.5227 109.4300 120.4257
fcast_AR21_plot <- plot_forecast(fcast_AR21, title = "ARMA(2,1,0) h =6", 
                                                                  Ytitle = "index-Base 2007 = 100", Xtitle="Date")
fcast_AR21_plot
check_res(fcast_AR21)

The AIC is 685.56, both phi parameters are < 1, our adjusted r-squared is 0.9614188 and RMSE is 1.2769. Our Q-test using Ljung was 0.24938500 and p-value looked good at 0.9692 with three lags. The AIC was lower than ARMA 2,1,2 and adjusted r-squared was marginally better.

6 MA(2)

# Fit an MA model with order of 2 for both terms with the first difference.
model_MA012 = arima(working_hours_ts, order = c(0, 1, 2))
summary(model_MA012)
## 
## Call:
## arima(x = working_hours_ts, order = c(0, 1, 2))
## 
## Coefficients:
##           ma1      ma2
##       -0.0288  -0.1598
## s.e.   0.0691   0.0690
## 
## sigma^2 estimated as 1.635:  log likelihood = -339.62,  aic = 685.25
## 
## Training set error measures:
##                     ME     RMSE       MAE       MPE      MAPE     MASE
## Training set 0.1020634 1.275489 0.4621777 0.0863057 0.4608167 1.030429
##                      ACF1
## Training set -0.002921037
coeftest(model_MA012)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)  
## ma1 -0.028784   0.069097 -0.4166  0.67699  
## ma2 -0.159822   0.069021 -2.3156  0.02058 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
autoplot(model_MA012)

BIC(model_MA012)
## [1] 695.2043
# Root test
if (all(abs(polyroot(c(1, -model_MA012$coef))) < 1)) {
  message("Model is invertible.")
} else {
  message("Model is not invertible.")
}  #Not invertible

REGRESSION INTERPRETATIONS:

# R-Squared
n = length(working_hours_ts)
ma2_R2 = cor(fitted(model_MA012), working_hours_ts)^2  # 0.9618626
ma2_AR2 = 1 - (1 - ma2_R2) * (n - 1) / (n - 2 - 1)
ma2_R2
## [1] 0.9618626
ma2_AR2
## [1] 0.961485
# Get the residuals from the ARIMA model
ma_residuals <- residuals(model_MA012)

y <- length(as.vector(working_hours_ts))

# Calculate the total sum of squares (TSS)
mean_y <- mean(as.vector(working_hours_ts))
tss <- sum((as.vector(working_hours_ts) - mean_y)^2)

# Calculate the residual sum of squares (RSS)
rss <- sum(ma_residuals^2)

# Calculate the number of predictors or parameters in the model (p)
p <- length(model_MA012$coef)

# Calculate the number of observations (n)
n <- length(working_hours_ts)

# Calculate R-squared
rsquared <- 1 - (rss / tss)

# Calculate the adjusted R-squared
adj_rsquared <- 1 - ((1 - rsquared) * (n - 1) / (n - p - 1))

# Print the calculated R-squared and adjusted R-squared values
print(rsquared)
## [1] 0.9615839
print(adj_rsquared)
## [1] 0.9612035
# Residuals check ACF/PACF
check_res(model_MA012)
# Plot residuals in ACF/PACF
MA012_acf_residuals <- acf(model_MA012$residuals, main = "ACF Residuals")

MA012_pacf_residuals <- pacf(model_MA012$residuals, main = "PACF Residuals")

# combine ACF/PACF with residuals using qqnorm.
par(mfrow = c(3,1) , mex = 0.6, cex = 0.8, cex.main = .9)
plot(MA012_acf_residuals)
plot(MA012_pacf_residuals)
qqnorm(residuals(model_MA012), main = 'ARMA(2,1,0) Residuals')
qqline(model_MA012$residuals, col = 'red')

# Q-Test
r_2_box_test <- Box.test(model_MA012$residuals, lag = 3, type = "Ljung")
r_2_box_test # X-squared = 0.22148, df = 5, p-value = 0.9989 - No serial Correlations/residuals independently distributed
## 
##  Box-Ljung test
## 
## data:  model_MA012$residuals
## X-squared = 0.12569, df = 3, p-value = 0.9886
QTest<-numeric(3)
for (i in 1:3){ 
  qt <- Box.test(model_MA012$residuals, lag=i, type="Ljung")
  QTest[i]=qt$statistic
} 

QTest 
## [1] 0.001774877 0.008936484 0.125689013
plot(QTest, main = "MA2 Q Test", xlab = "lag", ylab = "p-value")
abline(h = .05, col = "red", lty = 3)

# Forecast 6 months
fcast_MA012 <- forecast(model_MA012, h=6)
autoplot(fcast_MA012, include = 12, ylab = "Working hours - Base 2007 with 100")

summary(fcast_MA012)
## 
## Forecast method: ARIMA(0,1,2)
## 
## Model Information:
## 
## Call:
## arima(x = working_hours_ts, order = c(0, 1, 2))
## 
## Coefficients:
##           ma1      ma2
##       -0.0288  -0.1598
## s.e.   0.0691   0.0690
## 
## sigma^2 estimated as 1.635:  log likelihood = -339.62,  aic = 685.25
## 
## Error measures:
##                     ME     RMSE       MAE       MPE      MAPE      MASE
## Training set 0.1020634 1.275489 0.4621777 0.0863057 0.4608167 0.1564094
##                      ACF1
## Training set -0.002921037
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Apr 2023       114.9123 113.2737 116.5508 112.4063 117.4183
## May 2023       114.9056 112.6214 117.1898 111.4122 118.3989
## Jun 2023       114.9056 112.2626 117.5485 110.8635 118.9476
## Jul 2023       114.9056 111.9470 117.8641 110.3809 119.4302
## Aug 2023       114.9056 111.6620 118.1491 109.9450 119.8661
## Sep 2023       114.9056 111.4001 118.4110 109.5444 120.2667
check_res(model_MA012)
# Interactive Chart
fcast_MA012_plot <- plot_forecast(fcast_MA012, title = "ARMA(0,1,2) h =6", 
                                 Ytitle = "index-Base 2007 = 100", Xtitle="Date")
fcast_MA012_plot

The AIC is 685.25, much better than MA(1) and ARMA(2,2), but only marginally better than AR(2). The adjusted r-squared is 0.961485, just slighly lower than AR(2) and ARMA(2,2), and RMSE was 1.275489. The Ljung box test p-value looked good at 0.9886 at 3 lags.

Our preferred models is AR(2) and MA(2) as they have the highest Adjusted R-squared and lowest AIC/SIC of the three models.

REGRESSION INTERPRETATIONS:

7 Step 2 DTC Forecasting Environment

There are 204 observations and we’ll be doing a 90% / 10% split, thus our estimation sample with have 184 observations and our prediction sample will have 21 observations. We’ll be applying a fixed scheme that will produce only one estimate and does not allow for parameter updating. We’ll be using a quadratic loss function, as penalty for over/under estimating forecast has equal penalty.

7.1 Data Split into Train & Test Samples

# split of time-series base time-series
length(working_hours_ts)
## [1] 205
train <- window(working_hours_ts,start=c(2006,3),end=c(2021,6)) # 184 observations
test <- window(working_hours_ts, start=c(2021,7),end=c(2023,3)) # 21 observations
length(train)
## [1] 184
length(test)
## [1] 21

8 ARMA(2,1,2)

arma22 <- arima(train, order = c(2,1,2))
autoplot(arma22)

coeftest(arma22)
## 
## z test of coefficients:
## 
##     Estimate Std. Error z value Pr(>|z|)
## ar1  0.18574    0.42339  0.4387   0.6609
## ar2  0.10823    0.36673  0.2951   0.7679
## ma1 -0.22179    0.41047 -0.5403   0.5890
## ma2 -0.27262    0.36487 -0.7472   0.4550
summary(arma22)
## 
## Call:
## arima(x = train, order = c(2, 1, 2))
## 
## Coefficients:
##          ar1     ar2      ma1      ma2
##       0.1857  0.1082  -0.2218  -0.2726
## s.e.  0.4234  0.3667   0.4105   0.3649
## 
## sigma^2 estimated as 1.78:  log likelihood = -312.48,  aic = 634.96
## 
## Training set error measures:
##                      ME     RMSE       MAE        MPE      MAPE     MASE
## Training set 0.07429913 1.330678 0.4737119 0.06076164 0.4778019 1.048238
##                      ACF1
## Training set -0.003449782
arma22fcast_acc_train <- as.data.frame(accuracy(arma22))

# Generate Forecast for Test Sample
arma22fcast <- Arima(test, model = arma22)
check_res(arma22fcast)
summary(arma22fcast)
## Series: test 
## ARIMA(2,1,2) 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2
##       0.1857  0.1082  -0.2218  -0.2726
## s.e.  0.0000  0.0000   0.0000   0.0000
## 
## sigma^2 = 1.78:  log likelihood = -17.03
## AIC=36.06   AICc=36.28   BIC=37.05
## 
## Training set error measures:
##                     ME      RMSE       MAE       MPE      MAPE      MASE
## Training set 0.4017321 0.5527739 0.4141204 0.3574678 0.3684362 0.1143277
##                    ACF1
## Training set -0.2294996
arma22fcast_acc_test <- as.data.frame(accuracy(arma22fcast))

# Extract Fitted Values from Forecast
arma22_forecast <- round(arma22fcast$fitted, digits = 5)

# Calculate Errors with Test Sample
arma22error <- round(test - arma22fcast$fitted, digits = 5)

# Calculate Loss with Test Sample.
arma22_loss <- round(arma22error^2, digits = 5)

# Calculate MSE
arma22_mse <- round(mean(arma22_loss), digits = 5)

# MPE Test
arma22mpetest <- lm(arma22error ~ 1)
summary(arma22mpetest)
## 
## Call:
## lm(formula = arma22error ~ 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46823 -0.33382  0.03659  0.14100  0.84373 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.4017     0.0849   4.732 0.000128 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3891 on 20 degrees of freedom
# Use Robust Standard Errors
coeftest(arma22mpetest, vcoc= vcocHC(arma22mpetest, type = 'HC0'))
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 0.401732   0.084903  4.7317 0.0001277 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Run Information Efficiency (I.E) Test
arma22ietest <- lm(arma22error ~ arma22fcast$fitted)
summary(arma22ietest)
## 
## Call:
## lm(formula = arma22error ~ arma22fcast$fitted)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51795 -0.30421 -0.02549  0.16559  0.78253 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)         6.48033    4.70550   1.377    0.184
## arma22fcast$fitted -0.05418    0.04194  -1.292    0.212
## 
## Residual standard error: 0.3827 on 19 degrees of freedom
## Multiple R-squared:  0.08076,    Adjusted R-squared:  0.03238 
## F-statistic: 1.669 on 1 and 19 DF,  p-value: 0.2118
# IET  = Extract all info. from residuals and put into forecast.  R
# Residuals should not be correlated with forecast.

# Use Robust Standard Errors
coeftest(arma22ietest, vcoc= vcocHC(arma22ietest, type = 'HC0'))
## 
## t test of coefficients:
## 
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)         6.480334   4.705502  1.3772   0.1845
## arma22fcast$fitted -0.054184   0.041938 -1.2920   0.2118
# Create Data Table with Pertinent Values
arma22_df <- as.data.frame(format(cbind(test, arma22_forecast, arma22error, arma22_loss)
                                     , scientific = FALSE, nsmall = 5))
arma22_dt <- datatable(arma22_df, filter = "top", 
                          options = list(pageLength = nrow(test),autoWidth = TRUE),
                          caption = "ARMA(0,1,2) Forecast, Error & Loss")
arma22_dt
# Create Data Table for Pertinent Fit Metrics

fit_arma22_dt <- datatable(round(rbind(arma22fcast_acc_train, arma22fcast_acc_test), digits = 5), 
          options = list(pageLength = nrow(arma22_dt), autoWidth = TRUE),
          caption = "ARMA(2,1,2) Metrics In-Sample & Out-of-Sample")
fit_arma22_dt
# Interactive Plots
plot_arma22 <- cbind(test,arma22_forecast)
ts_plot(plot_arma22, width = 3,
        slider = FALSE, type = "single",
        Ytitle = "First Difference of Hours Worked Index",
        Xgrid = TRUE, Ygrid = TRUE,
        title = "ARMA(2,2) July 2021 - March 2023")

9 ARMA(2,1,0)

ar2 <- Arima(train, order = c(2,1,0))
autoplot(ar2)

coeftest(ar2)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)  
## ar1 -0.026227   0.072830 -0.3601  0.71877  
## ar2 -0.159992   0.072585 -2.2042  0.02751 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(ar2)
## Series: train 
## ARIMA(2,1,0) 
## 
## Coefficients:
##           ar1      ar2
##       -0.0262  -0.1600
## s.e.   0.0728   0.0726
## 
## sigma^2 = 1.81:  log likelihood = -312.98
## AIC=631.96   AICc=632.1   BIC=641.59
## 
## Training set error measures:
##                      ME     RMSE       MAE        MPE      MAPE      MASE
## Training set 0.06505855 1.334404 0.4638089 0.05273247 0.4675245 0.1687291
##                      ACF1
## Training set -0.007420601
ar2fcast_acc_train <- as.data.frame(accuracy(ar2))

# Generate Forecast for Test Sample
ar2fcast <- Arima(test, model = ar2)
summary(ar2fcast)
## Series: test 
## ARIMA(2,1,0) 
## 
## Coefficients:
##           ar1    ar2
##       -0.0262  -0.16
## s.e.   0.0000   0.00
## 
## sigma^2 = 1.81:  log likelihood = -15.65
## AIC=33.31   AICc=33.53   BIC=34.3
## 
## Training set error measures:
##                     ME      RMSE       MAE       MPE      MAPE      MASE
## Training set 0.3507459 0.5163881 0.3838474 0.3124509 0.3417404 0.1059701
##                    ACF1
## Training set -0.2198242
check_res(ar2fcast)
ar2fcast_acc_test <- as.data.frame(accuracy(ar2fcast))

# Extract Fitted Values from Forecast
ar2_forecast <- round(ar2fcast$fitted, digits = 5)

# Calculate Errors with Test Sample
ar2error <- round(test - ar2fcast$fitted, digits = 5)

# Calculate Loss with Prediction Sample.
ar2_loss <- round(ar2error^2, digits = 5)

# Calculate MSE
ar2_mse <- round(mean(ar2_loss), digits = 5)

# MPE Test
ar2mpetest <- lm(ar2error ~ 1)
summary(ar2mpetest)
## 
## Call:
## lm(formula = ar2error ~ 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4963 -0.3335  0.0080  0.1368  0.8054 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.35075    0.08474   4.139 0.000508 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3883 on 20 degrees of freedom
# Use Robust Standard Errors
coeftest(ar2mpetest, vcoc= vcocHC(ar2mpetest, type = 'HC0'))
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 0.350746   0.084745  4.1388 0.0005085 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Run Information Efficiency (I.E) Test
ar2ietest <- lm(ar2error ~ ar2fcast$fitted)
summary(ar2ietest)
## 
## Call:
## lm(formula = ar2error ~ ar2fcast$fitted)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55122 -0.29560 -0.02713  0.16375  0.73798 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)      7.28472    4.59076   1.587    0.129
## ar2fcast$fitted -0.06178    0.04090  -1.511    0.147
## 
## Residual standard error: 0.3765 on 19 degrees of freedom
## Multiple R-squared:  0.1072, Adjusted R-squared:  0.06024 
## F-statistic: 2.282 on 1 and 19 DF,  p-value: 0.1473
# Use Robust Standard Errors
coeftest(ar2ietest, vcoc= vcocHC(ar2ietest, type = 'HC0'))
## 
## t test of coefficients:
## 
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)      7.284717   4.590760  1.5868   0.1291
## ar2fcast$fitted -0.061781   0.040897 -1.5107   0.1473
# Create Data Table with Pertinent Values
ar2_df <- as.data.frame(format(cbind(test, ar2_forecast, ar2error, ar2_loss)
                                     , scientific = FALSE, nsmall = 5))
ar2_dt <- datatable(ar2_df, filter = "top", 
                          options = list(pageLength = nrow(test),autoWidth = TRUE),
                          caption = "AR(2) Forecast, Error & Loss")
ar2_dt
# Create Data Table for Pertinent Fit Metrics
fit_ar2_dt <- datatable(round(rbind(ar2fcast_acc_train, ar2fcast_acc_test), digits = 5), 
          options = list(pageLength = nrow(ar2_dt), autoWidth = TRUE),
          caption = "ARMA(2) Metrics In-Sample & Out-of-Sample")
fit_ar2_dt
# Interactive Plots
plot_ar2 <- cbind(test,ar2_forecast)
ts_plot(plot_ar2, width = 3,
        slider = FALSE, type = "single",
        Ytitle = "First Difference of Hours Worked Index",
        Xgrid = TRUE, Ygrid = TRUE,
        title = "AR(2) July 2021 - March 2023")

9.1 MA(2)

ma2 <- Arima(train, order = c(0,1,2))
autoplot(ma2)

coeftest(ma2)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)  
## ma1 -0.041101   0.073135 -0.5620   0.5741  
## ma2 -0.176453   0.074460 -2.3698   0.0178 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(ma2)
## Series: train 
## ARIMA(0,1,2) 
## 
## Coefficients:
##           ma1      ma2
##       -0.0411  -0.1765
## s.e.   0.0731   0.0745
## 
## sigma^2 = 1.804:  log likelihood = -312.69
## AIC=631.37   AICc=631.51   BIC=641
## 
## Training set error measures:
##                      ME     RMSE       MAE        MPE      MAPE      MASE
## Training set 0.06936323 1.332218 0.4678935 0.05643503 0.4717356 0.1702151
##                     ACF1
## Training set 0.003841594
ma2fcast_acc_train <- as.data.frame(accuracy(ma2))

# Generate Forecast for Test Sample
ma2fcast <- Arima(test, model = ma2)
summary(ma2fcast)
## Series: test 
## ARIMA(0,1,2) 
## 
## Coefficients:
##           ma1      ma2
##       -0.0411  -0.1765
## s.e.   0.0000   0.0000
## 
## sigma^2 = 1.804:  log likelihood = -16.28
## AIC=34.56   AICc=34.78   BIC=35.56
## 
## Training set error measures:
##                     ME      RMSE       MAE       MPE      MAPE     MASE
## Training set 0.3750574 0.5326153 0.3967854 0.3339685 0.3531628 0.109542
##                    ACF1
## Training set -0.2126501
check_res(ma2fcast)
ma2fcast_acc_test <- as.data.frame(accuracy(ma2fcast))

# Extract Fitted Values from Forecast
ma2_forecast <- round(ma2fcast$fitted, digits = 5)

# Calculate Errors with Test Sample
ma2error <- round(test - ma2fcast$fitted, digits = 5)

# Calculate Loss with Test Sample.
ma2_loss <- round(ma2error^2, digits = 5)

# Calculate MSE
ma2_mse <- round(mean(ma2_loss), digits = 5)

# MPE Test
ma2mpetest <- lm(ma2error ~ 1)
summary(ma2mpetest)
## 
## Call:
## lm(formula = ma2error ~ 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46719 -0.32702  0.00241  0.15021  0.81384 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.37506    0.08456   4.435 0.000254 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3875 on 20 degrees of freedom
# Use Robust Standard Errors
coeftest(ma2mpetest, vcoc= vcocHC(ma2mpetest, type = 'HC0'))
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 0.375058   0.084561  4.4353 0.0002544 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Run Information Efficiency (I.E) Test
ma2ietest <- lm(ma2error ~ ma2fcast$fitted)
summary(ma2ietest)
## 
## Call:
## lm(formula = ma2error ~ ma2fcast$fitted)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52179 -0.30400 -0.02745  0.16264  0.74848 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)      7.04489    4.61228   1.527    0.143
## ma2fcast$fitted -0.05944    0.04110  -1.446    0.164
## 
## Residual standard error: 0.3773 on 19 degrees of freedom
## Multiple R-squared:  0.09918,    Adjusted R-squared:  0.05177 
## F-statistic: 2.092 on 1 and 19 DF,  p-value: 0.1644
# Use Robust Standard Errors
coeftest(ma2ietest, vcoc= vcocHC(ma2ietest, type = 'HC0'))
## 
## t test of coefficients:
## 
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)      7.044895   4.612284  1.5274   0.1431
## ma2fcast$fitted -0.059440   0.041097 -1.4463   0.1644
# Create Data Table with Pertinent Values
ma2_df <- as.data.frame(format(cbind(test, ma2_forecast, ma2error, ma2_loss)
                                     , scientific = FALSE, nsmall = 5))
ma2_dt <- datatable(ma2_df, filter = "top", 
                          options = list(pageLength = nrow(test),autoWidth = TRUE),
                          caption = "MA(2) Forecast, Error & Loss")
ma2_dt
# Create Data Table for Pertinent Fit Metrics
fit_ma2_dt <- datatable(round(rbind(ma2fcast_acc_train, ma2fcast_acc_test), digits = 5), 
          options = list(pageLength = nrow(ma2_dt), autoWidth = TRUE),
          caption = "ARMA(2) Metrics In-Sample & Out-of-Sample")
fit_ma2_dt
# Interactive Plots
plot_ma2 <- cbind(test,ma2_forecast)
ts_plot(plot_ma2, width = 3,
        slider = FALSE, type = "single",
        Ytitle ="First Difference of Hours Worked Index",
        Xgrid = TRUE, Ygrid = TRUE,
        title = "MA(2) July 2021 - March 2023")

10 Simple Average 4 Naive Forecast

f4 <-numeric(21) # Test Sample = 21

for (i in 1:21){ 
  f4[i]<-(working_hours_ts[181+i]+working_hours_ts[182+i]+
              working_hours_ts[183+i]+working_hours_ts[184+i])/4
}

f4error <- round(test-f4, digits = 3)
f4loss <- round(f4error^2, digits = 3)
mse_f4 <- mean(f4error^2) 
mpe_test_f4 <- lm(f4error~1) 
summary(mpe_test_f4) 
## 
## Call:
## lm(formula = f4error ~ 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57738 -0.27738  0.04762  0.24762  0.57262 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.50238    0.06487   7.744 1.92e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2973 on 20 degrees of freedom
# Use Robust Standard Errors
coeftest(mpe_test_f4, vcoc= vcocHC(mpe_test_f4, type = 'HC0'))
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 0.502381   0.064869  7.7445 1.918e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Combine Test, Forecast, Error, Loss into one Data Frame
final_f4 <- datatable(cbind(test, f4, f4error, f4loss), 
                      caption = "Simple Average 4 Naive Forecast: July 2021 - March 2023")
final_f4 
IET_test <- lm(f4error ~ f4)
summary(IET_test)
## 
## Call:
## lm(formula = f4error ~ f4)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39697 -0.16938  0.01769  0.20723  0.39289 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 10.28910    2.78091    3.70  0.00152 **
## f4          -0.08732    0.02481   -3.52  0.00229 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2373 on 19 degrees of freedom
## Multiple R-squared:  0.3947, Adjusted R-squared:  0.3628 
## F-statistic: 12.39 on 1 and 19 DF,  p-value: 0.00229
# Use Robust Standard Errors
coeftest(IET_test, vcoc= vcocHC(ma2ietest, type = 'HC0'))
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 10.289103   2.780912  3.6999  0.00152 **
## f4          -0.087316   0.024807 -3.5199  0.00229 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Create Data Table for Pertinent Fit Metrics
fit_MA4_metrics <- datatable(round(accuracy(f4, test), digits = 5),
                      caption = "Simple Average 4 Naive Forecast: July 2021 - March 2023")
fit_MA4_metrics
# Combine Test, Forecast, Error, MSE, Loss into One Data Frame
f4_dt <- datatable(round(cbind(test, f4, f4error, f4loss), digits = 5),
                   caption = "Simple Average 4 Naive Forecast: July 2021 - March 2023")
f4_dt
# Plots
f4_plot <- cbind(test, f4)

ts_plot(f4_plot, width = 3, slider = FALSE, type = "single", Ytitle = "First Difference of Working Hours Index",
        Xgrid = TRUE, Ygrid = TRUE, title = "Simple Naive Last 4 Observations Predicted Vs. Actual")

11 Graph of ARMA(2,2), AR(2), MA(2) & Simple Naive Last 4 Observations Graph

# Bind All Forecasts for ARMA(2,2), AR(2), MA(2) and Simple Naive 4 together
our_models <- cbind(test, arma22_forecast, ar2_forecast,ma2_forecast,f4)
plot_our_models <- ts_plot(our_models, slider = FALSE, type = "single",
                          Xgrid = TRUE, Ygrid = TRUE,
                          title = "All Forecasts July 2021 = March 2023")
plot_our_models

12 Equal Weight Combined Forecast

# Combine the forecasts using the OLS weighted combination scheme --------------
# 4 models, so n=4 and each weight is 1/n = 0.25

ew_comb_fcast <- numeric(21)

for (i in 1:21) {
  ew_comb_fcast[i] <- 0.25 * arma22_forecast[i] + 0.25 * ar2_forecast[i] +
                        0.25 * ma2_forecast[i] + 0.25 * f4[i]
}

# Error, Loss, MSE and MPE test
ew_error <- round(test-ew_comb_fcast, digits = 3)
ew_loss <- round(ew_error^2, digits = 3)
ew_mse_combo <- mean(ew_error^2)  
ew_mpe_test_combo <- lm(ew_error~1)
summary(ew_mpe_test_combo)
## 
## Call:
## lm(formula = ew_error ~ 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46148 -0.32348 -0.05748  0.19352  0.68352 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.40748    0.07634   5.338 3.18e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3498 on 20 degrees of freedom
# Use Robust Standard Errors
coeftest(ew_mpe_test_combo, vcoc= vcocHC(ew_mpe_test_combo, type = 'HC0'))
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 0.407476   0.076338  5.3378 3.178e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ew_IET_test <- lm(ew_error ~ ew_comb_fcast)
summary(ew_IET_test)
## 
## Call:
## lm(formula = ew_error ~ ew_comb_fcast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49381 -0.27223 -0.03639  0.15936  0.62209 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    7.51561    4.03250   1.864   0.0779 .
## ew_comb_fcast -0.06336    0.03594  -1.763   0.0940 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3327 on 19 degrees of freedom
## Multiple R-squared:  0.1406, Adjusted R-squared:  0.09536 
## F-statistic: 3.108 on 1 and 19 DF,  p-value: 0.09398
# Use Robust Standard Errors
coeftest(ew_IET_test, vcoc= vcocHC(ew_IET_test, type = 'HC0'))
## 
## t test of coefficients:
## 
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    7.515605   4.032496  1.8638  0.07788 .
## ew_comb_fcast -0.063365   0.035941 -1.7630  0.09398 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Combine Test, Equal Weight Forecast, Error, Loss into table
ew_combo_fcast21_df <- datatable(round(cbind(test,ew_comb_fcast, ew_error, ew_loss), digits = 3),
  caption = "July 2021-March 2023: Equal Weight ARMA(2,2), AR(2), MA(2), Simple Naive Last 4 observations")
ew_combo_fcast21_df
# Create Data Table for Pertinent Fit Metrics
fit_ew_comb_metrics <- datatable(round(accuracy(ew_comb_fcast, test), digits = 5),
        caption = "July 2021-March 2023: Equal Weight ARMA(2,2), AR(2), MA(2), Simple Naive Last 4 observations")
fit_ew_comb_metrics
# Plot Equal Weight Combined Forecast
ew_comb_plot <- cbind(test, ew_comb_fcast)

ew_plot <- ts_plot(ew_comb_plot, width = 3, slider = FALSE, type = "single", 
                    Ytitle = "First Difference of Working Hours Index",
                    Xtitle = "Date",
                    Xgrid = TRUE, Ygrid = TRUE, 
                    title = "July 2021-March 2023: Equal Weight ARMA(2,2), AR(2), MA(2), Simple Naive Last 4 observations")
ew_plot

13 Inverse MSE Forecast

imse_comb_fcast <- numeric(21)

imse_arma212 <- (1/arma22_mse) / ((1/arma22_mse) + (1/ar2_mse) + (1/ma2_mse) + (1/mse_f4))
imse_arma210 <- (1/ar2_mse) / ((1/ar2_mse) + (1/arma22_mse) + (1/ma2_mse) + (1/mse_f4))
imse_arma012 <- (1/ma2_mse) / ((1/ma2_mse) + (1/arma22_mse) + (1/ar2_mse) + (1/mse_f4))
imse_sma4 <- (1/mse_f4) / ((1/mse_f4) + (1/arma22_mse) + (1/ar2_mse) + (1/ma2_mse))

for (i in 1:21){
     imse_comb_fcast[i] <- (imse_arma212 * arma22_forecast[i] + 
                                  imse_arma210 * ar2_forecast[i] +
                                  imse_arma012 * ma2_forecast[i] +
                                  imse_sma4 * f4[i])
}

# Error, Loss, MSE and MPE test
imse_error <- round(test-imse_comb_fcast, digits = 3)
imse_loss <- round(imse_error^2, digits = 3)
imse_combo_mse <- mean(imse_error^2)
mpe_test_combo_inverse <- lm(imse_error~1)
summary(mpe_test_combo_inverse)
## 
## Call:
## lm(formula = imse_error ~ 1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45667 -0.32367 -0.04967  0.18733  0.70033 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.40267    0.07727   5.211 4.24e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3541 on 20 degrees of freedom
# Use Robust Standard Errors
coeftest(mpe_test_combo_inverse, vcoc= vcocHC(mpe_test_combo_inverse, type = 'HC0'))
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 0.402667   0.077267  5.2114 4.236e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
imse_IET_test <- lm(imse_error ~ imse_comb_fcast)
summary(imse_IET_test)
## 
## Call:
## lm(formula = imse_error ~ imse_comb_fcast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49911 -0.27493 -0.03311  0.15960  0.63844 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      7.44844    4.10027   1.817   0.0851 .
## imse_comb_fcast -0.06281    0.03654  -1.719   0.1019  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.338 on 19 degrees of freedom
## Multiple R-squared:  0.1345, Adjusted R-squared:  0.08899 
## F-statistic: 2.954 on 1 and 19 DF,  p-value: 0.1019
# Use Robust Standard Errors
coeftest(IET_test, vcoc= vcocHC(IET_test, type = 'HC0'))
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 10.289103   2.780912  3.6999  0.00152 **
## f4          -0.087316   0.024807 -3.5199  0.00229 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Combine Test, Equal Weight Forecast, Error, Loss into Table
imse_comb_final <- datatable(round(cbind(test,imse_comb_fcast, imse_error, imse_loss),digits = 3),
                caption = "Inverse MSE Combined Forecast for July 2021 - Mar 2023")
imse_comb_final
# Create Data Table for Pertinent Fit Metrics
fit_imse_comb_metrics <- datatable(round(accuracy(imse_comb_fcast, test), digits = 5),
                      caption = "Inverse MSE Combined Forecast for July 2021 - Mar 2023")
fit_imse_comb_metrics
# Plot Inverse MSE Forecast
imse_comb_plot <- cbind(test, imse_comb_fcast)

imse_plot <- ts_plot(imse_comb_plot, width = 3, slider = FALSE, type = "single", 
                    Ytitle = "First Difference of Working Hours Index",
                    Xgrid = TRUE, Ygrid = TRUE, 
                    title = "July 2021-March 2023: Inverse MSE of ARMA(2,2), AR(2),
                    MA(2), Simple Naive Last 4 observations")
imse_plot

14 OLS Weighted Forecast

g0<-window(working_hours_ts,start=c(2021,7))
comb_ols <-lm(g0 ~ arma22_forecast + ar2_forecast + ma2_forecast + f4) # forecasts of 4 models
summary(comb_ols)
## 
## Call:
## lm(formula = g0 ~ arma22_forecast + ar2_forecast + ma2_forecast + 
##     f4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4761 -0.1770  0.0592  0.1907  0.3213 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      11.1407     3.4332   3.245  0.00507 ** 
## arma22_forecast   3.8070     3.7396   1.018  0.32382    
## ar2_forecast      4.7128     4.2778   1.102  0.28690    
## ma2_forecast     -8.9425     7.1619  -1.249  0.22976    
## f4                1.3283     0.2472   5.373 6.22e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2355 on 16 degrees of freedom
## Multiple R-squared:  0.9885, Adjusted R-squared:  0.9856 
## F-statistic: 344.4 on 4 and 16 DF,  p-value: 2.689e-15
# Create Forecast
fcast_ols <- ts(comb_ols$fitted.values, frequency = 12, start = c(2021,7))

# Error, Loss, MSE and MPE test
ols_error <- test-fcast_ols
ols_loss <- ols_error^2
ols_combo_mse <- mean(ols_loss)
mpe_test_combo_ols <- lm(ols_error ~ 1)
summary(mpe_test_combo_ols)
## 
## Call:
## lm(formula = ols_error ~ 1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4761 -0.1770  0.0592  0.1907  0.3213 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.753e-16  4.596e-02       0        1
## 
## Residual standard error: 0.2106 on 20 degrees of freedom
# Use Robust Standard Errors
coeftest(mpe_test_combo_ols, vcoc= vcocHC(mpe_test_combo_ols, type = 'HC0'))
## 
## t test of coefficients:
## 
##                Estimate  Std. Error t value Pr(>|t|)
## (Intercept) -6.7533e-16  4.5963e-02       0        1
ols_IET_test <- lm(imse_error ~ fcast_ols)
summary(ols_IET_test)
## 
## Call:
## lm(formula = imse_error ~ fcast_ols)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48432 -0.29463 -0.02969  0.14367  0.68312 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  5.92958    4.50492   1.316    0.204
## fcast_ols   -0.04909    0.04001  -1.227    0.235
## 
## Residual standard error: 0.3497 on 19 degrees of freedom
## Multiple R-squared:  0.07342,    Adjusted R-squared:  0.02466 
## F-statistic: 1.506 on 1 and 19 DF,  p-value: 0.2348
# Combine Test, Equal Weight Forecast, Error, Loss into Table
ols_comb_final <- datatable(round(cbind(test,fcast_ols, ols_error, ols_loss),digits = 3),
                caption = "OLS Combined Forecast for July 2021 - Mar 2023")
ols_comb_final
# Create Data Table for Pertinent Fit Metrics
fit_ols_comb_metrics <- datatable(round(accuracy(fcast_ols, test), digits = 5),
                      caption = "Inverse MSE Combined Forecast for July 2021 - Mar 2023")
fit_ols_comb_metrics
# Plot OLS Forecast
ols_comb_plot <- cbind(test, fcast_ols)

ols_plot <- ts_plot(ols_comb_plot, width = 3, slider = FALSE, type = "single", 
                    Ytitle = "First Difference of Working Hours Index",
                    Xgrid = TRUE, Ygrid = TRUE, 
                    title = "July 2021-March 2023: OLS of ARMA(2,2), AR(2),MA(2), Simple Naive Last 4 observations")
ols_plot

15 Graph of All Models

all_models <- cbind(test, arma22_forecast,ar2_forecast, ma2_forecast, f4,
                    ew_comb_fcast, imse_comb_fcast,fcast_ols)
all_models_plot <- ts_plot(all_models, width = 2, slider = FALSE, type = "single",
                    Ytitle = "First Difference of Working Hours Index",
                    Xgrid = TRUE, Ygrid = TRUE, 
                    title = "July 2021-March 2023: All Models")
all_models_plot

16 Graph Combination Models Vs. Test

combo_models <- cbind(test, ew_comb_fcast, imse_comb_fcast,fcast_ols)
combo_models_plot <- ts_plot(combo_models, width = 2, slider = FALSE, type = "single",
                    Ytitle = "First Difference of Working Hours Index",
                    Xgrid = TRUE, Ygrid = TRUE, 
                    title = "July 2021-March 2023: Combination Models")
combo_models_plot

17 Table for All Model Results

# Combine Forecasts and Loss for Each Model
summary_table <- datatable(cbind(test, arma22_forecast, arma22error, arma22_loss, ar2_forecast,
                         ar2error, ar2_loss, ma2_forecast, ma2error, ma2_loss,
                         f4, f4error,f4loss, ew_comb_fcast, ew_error, ew_loss,
                         imse_comb_fcast, imse_error, imse_loss,fcast_ols, ols_error,ols_loss ),
                         filter = "top", 
                          options = list(pageLength = nrow(test),autoWidth = TRUE),
                          caption = "Summary Table : All Models")
summary_table

18 All Models Out-of-Sample MSE

model_all_mse <- datatable(round(cbind(arma22_mse, ar2_mse, ma2_mse, 
                                mse_f4,ew_mse_combo, imse_combo_mse,ols_combo_mse),
                                digits = 5),
                          caption = "All Models MSE") 
model_all_mse